import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.animation as animation
from IPython.display import HTML
This notebook shows how to build a Bar Chart Race. It is based on the blog post by Pratap Vardhan, Bar Chart Race in Python with Matplotlib posted on September 04, 2019.
As we are following the steps by Pratap Vardhan, we are going to read the city populations dataset with pandas. We are ging to work with the 'name' (name of the city), 'group' (continent/country that the city belongs to), 'year' and 'value' (population size). Historical data (several years) are very interesting for generating races.
# read the dataset
data = pd.read_csv('https://git.io/fjpo3', usecols=['name','group','year','value'])
# plot the first three entries of the dataset
data.head(3)
In this case, we will see the top 10 values that are given in a year. We can change this by changing the value inside the .head() function.
# year that we are interesting in plotting
current_year = 2018
# sort and select top 10 entries by value
data_trans = data[data['year'].eq(current_year)].sort_values(by='value',ascending=True).head(10)
# show the result table
data_trans
We will draw a basic bar chart to show how each of the frames of the animated chart will look like. We will use the ax.barh(x,y) function to draw horizontal barchart.
# Generate figure and axes and plot the cities with corresponding population value
fig, ax = plt.subplots(figsize=(15,8))
ax.barh(data_trans['name'],data_trans['value'])
In the original post, the author mentions that we have to flip the chart as the highest bar is at the bottom, I will showcase this as he does in the code but I will not apply it as My chart is already flipped.
The colors will be assigned based on the groups (you can do some analysis before to find out which unique groups we have, etc.). colors will be a dictionary mapping the color assigned to each distinct group, group_lk adds colors to the bars.
# create dictionary of colors
colors = dict(zip(['India','Europe','Asia','Latin America','Middle East','North America','Africa'],['#adb0ff','#ffb3ff','#90d595','#e48381','#aafbff','#f7bb5f','#eafb50']))
# add colors to names based on group colors
group_lk = data.set_index('name')['group'].to_dict()
# create figure and axes
fig, ax = plt.subplots(figsize=(15,8))
# flip valued from top to bottom: only if necessary!
# data_trans = data_trans[::-1]
# pass colors values to `color=`
ax.barh(data_trans['name'],data_trans['value'],color=[colors[group_lk[x]] for x in data_trans['name']])
# iterate over the values to plot labels and values (Tokyo, Asia, 38194.2)
for i, (value,name) in enumerate(zip(data_trans['value'],data_trans['name'])):
ax.text(value,i,name,ha='right') # Tokyo: name
ax.text(value,i-.25,group_lk[name], ha='right') # Asia: group name
ax.text(value,i,value,ha='left') # 38194.2: value
# Add year right middle portion of canvas
ax.text(1,0.4,current_year,transform=ax.transAxes,size=46,ha='right')
We will make a function for convenience draw_barchart and we will move our code there.
The style will include as follows:
fig, ax = plt.subplots(figsize=(15,8))
def draw_barchart(year):
data_trans = data[data['year'].eq(year)].sort_values(by='value',ascending=True).tail(10)
ax.clear()
ax.barh(data_trans['name'],data_trans['value'],color=[colors[group_lk[x]] for x in data_trans['name']])
dx = data_trans['value'].max() / 200
for i, (value,name) in enumerate(zip(data_trans['value'],data_trans['name'])):
ax.text(value-dx,i,name,size=14,weight=600,ha='right',va='bottom')
ax.text(value-dx,i-.25,group_lk[name],size=10,color='#444444',ha='right',va='baseline')
ax.text(value+dx,i,f'{value:,.0f}',size=14,ha='left',va='center')
ax.text(1,0.4,year,transform=ax.transAxes,color='#777777',size=46,ha='right',weight=800)
ax.text(0,1.06,'Population (thousands)', transform=ax.transAxes,size=12,color="#777777")
ax.xaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}'))
ax.xaxis.set_ticks_position('top')
ax.tick_params(axis='x',color='#777777',labelsize=12)
ax.set_yticks([])
ax.margins(0,0.01)
ax.grid(which='major',axis='x',linestyle='-')
ax.set_axisbelow(True)
ax.text(0,1.12,'The most populous cities in the world from 1500 to 2018',transform=ax.transAxes,size=24,weight=600,ha='left')
ax.text(1,0,'by @ai.coding based on @pratapvardhan; credit @jburnmurdoch',transform=ax.transAxes,ha='right',color="#777777",bbox=dict(facecolor='white',alpha=0.8,edgecolor='white'))
plt.box(False)
draw_barchart(2018)
For the animation, we will be using FuncAnimation from matplotlib.animation. This function makes an animation by repeatedly calling a function (that draws on canvas). The frames argument represents the values you want to run the function on.
fig, ax = plt.subplots(figsize=(15,8))
animator = animation.FuncAnimation(fig,draw_barchart,frames=range(1968,2019))
HTML(animator.to_jshtml())
We can save the video for sharing using the animator.save() function.
animator.save('../Results/population1500-2018.mp4')